from lxml import etree
import cssselect

str = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document </title>
</head>
<body>
    <ul>
        <li><ahref="www.baidu1.com">链接1</a></li>
        <li><a class="active"  href="www.baidu2.com">链接2</a></li>
        <li><a href="www.baidu3.com">链接3</a></li>
        <li><a href="www.baidu4.com">链接4</a></li>
        <li><a href="www.baidu5.com">链接5</a></li>
    </ul>
    <ol>
        <li><a href="www.baidu11.com">链接11</a></li>
        <li><a href="www.baidu22.com">链接22</a></li>
        <li><a href="www.baidu33.com">链接33</a></li>
    </ol>
</body>
</html>
'''

tree = etree.HTML(str)
print(tree)
# titles = tree.cssselect('title')
# for title in titles:
#     print(title.text, title.attrib)

# print('=================')
# lis = tree.cssselect('li')
# for li in lis:
#     print(li.text, li.attrib, li.tag)
#
# print('=================')
# lis = tree.cssselect('ol li')
# for li in lis:
#     a = li.cssselect('a')[0]
#     print(a.text, a.attrib, a.tag)

# tree = etree.HTML(str)
#
# html = tree.find('body')
#
# ul = html.find('ul')
# print(ul.tag)
#
# for e in ul.findall('li'):
#     a = e.find('a')
#     print(a.get('href'),a.text)


# ul = tree.xpath('//ol')[0]

#
# print(ul.tag)
#
# for e in ul.xpath('.//a') :
#     print(e.get('href'))
#     print(e.xpath('./text()'),e.xpath('./@href'))




# ul = tree.xpath('//ol')[0]


item = tree.xpath('//a[contains(@class,"active")]')
print(item)
#
# item = tree.xpath(' //li[position()>=2] ')
# print(item)
#
# print('=================')
# item = tree.xpath(' //a[starts-with(text(),"链接")]')
# print(item)

#
# print('=================')
# item = tree.xpath('//*[2]')
# for e in item:
#     print(e.tag)
# print(item)
#

#
# print('=================')
# item = tree.xpath('//*[last()]')
# for e in item:
#     print(e.tag)
# print(item)












